package io.zbus.data.spider;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Spider {  
	private static final Logger logger = LoggerFactory.getLogger(Spider.class);
	protected String urlSeed; 
	protected Map<String, Object> urlHistory = new ConcurrentHashMap<>(); 
	protected Downloader downloader = new Downloader();  
	protected String baseUrl; 
	protected Map<String, Object> context = new ConcurrentHashMap<>();
	
	protected SpiderHandler handler;
	
	/** 
	 * @param doc
	 * @param ctx
	 * @return url strings to download and parse again
	 */
	public List<String> parse(Document doc, Map<String, Object> ctx) { // change to call js 
		if(handler == null) {
			logger.info(""+doc);
			return null;
		}
		return handler.handle(doc, ctx);
	}
	
	protected void handleUrl(String url) { 
		if(urlHistory.containsKey(url)) {
			return; //already done
		}
		try {
			Document doc = downloader.download(url);
			urlHistory.put(url, true);
			List<String> nexts = parse(doc, context);
			if(nexts != null) {
				for(String nextUrl : nexts) {
					handleUrl(nextUrl);
				}
			}
		} catch (IOException e) { 
			e.printStackTrace();
		}
	} 
	
	public void start() {  
		handleUrl(this.urlSeed);
	} 
	
	public void setUrlSeed(String urlSeed) {
		this.urlSeed = urlSeed;
		try {
			URI uri = new URI(this.urlSeed);
			this.baseUrl = uri.getScheme() + "://" + uri.getHost(); 
		} catch (URISyntaxException e) {
			throw new IllegalArgumentException(e.getMessage(), e);
		} 
		context.put("baseUrl", baseUrl);
	}
	
	public void setContext(Map<String, Object> context) {
		this.context = context;
	} 
	
	public void addContext(String key, Object value) {
		this.context.put(key, value);
	} 
	
	public void setHandler(SpiderHandler handler) {
		this.handler = handler;
	}  
	
	public void setJsHandler(String jsHandlerFile) { 
		final JsParser parser = new JsParser(); 
		this.handler = new SpiderHandler() { 
			@SuppressWarnings("unchecked")
			@Override
			public List<String> handle(Document doc, Map<String, Object> ctx) {
				try {  
					ctx.put("baseUrl", baseUrl);
					Object urls = parser.parse(doc, jsHandlerFile, ctx);
					if(urls != null && urls instanceof List) {
						return (List<String>)urls;
					}
				} catch (Exception e) {
					logger.error(e.getMessage(), e);
				}
				return null;
			}
		};
	} 
}
